/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// scalastyle:off println
package org.zjt.spark.mllib

import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.{SparkConf, SparkContext}
// $example on$
import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.util.MLUtils
// $example off$

object BinaryClassificationMetricsExample {

  def main(args: Array[String]): Unit = {

    val conf = new SparkConf().setAppName("BinaryClassificationMetricsExample").setMaster("local[2]")
    val sc = new SparkContext(conf)
    // $example on$
    // Load training data in LIBSVM format
    val data = MLUtils.loadLibSVMFile(sc, "/Users/zhangjuntao/IdeaProjects/myproject/hw-bigdata/scala-demo/src/main/resource/mllib/sample_binary_classification_data.txt")


    // Split data into training (60%) and test (40%)
    val Array(training, test) = data.randomSplit(Array(0.6, 0.4), seed = 11L)
    training.cache()


    // Run training algorithm to build the model      setNumClasses  K分类数目
    val model = new LogisticRegressionWithLBFGS().setNumClasses(5).run(training)


    // Clear the prediction threshold so the model will return probabilities   清除预测阈值，因此模型将返回概率
    model.clearThreshold


    // Compute raw scores on the test set
    val predictionAndLabels = test.map { case LabeledPoint(label, features) =>
      val prediction = model.predict(features)
      (prediction, label)
    }.persist()

    for (pl <- predictionAndLabels) {
      println("prediction:%s\tlabel:%s".format(pl._1, pl._2))
    }

    val v1 = predictionAndLabels.filter(p => p._2.equals(p._1)).count().toDouble
    val v2 = predictionAndLabels.count().toDouble

    println(v1 / v2)
    /*

        // Instantiate metrics object  预测实例
        val metrics = new BinaryClassificationMetrics(predictionAndLabels)


        // Precision by threshold 精度的门槛 阈值
        val precision = metrics.precisionByThreshold
        precision.foreach { case (t, p) =>
          println(s"Threshold: $t, Precision: $p")
        }


        // Recall by threshold   召回率
        val recall = metrics.recallByThreshold
        recall.foreach { case (t, r) =>
          println(s"Threshold: $t, Recall: $r")
        }


        // Precision-Recall Curve
        val PRC = metrics.pr

        // F-measure
        val f1Score = metrics.fMeasureByThreshold
        f1Score.foreach { case (t, f) =>
          println(s"Threshold: $t, F-score: $f, Beta = 1")
        }

        val beta = 0.5
        val fScore = metrics.fMeasureByThreshold(beta)
        f1Score.foreach { case (t, f) =>
          println(s"Threshold: $t, F-score: $f, Beta = 0.5")
        }

        // AUPRC
        val auPRC = metrics.areaUnderPR
        println("Area under precision-recall curve = " + auPRC)

        // Compute thresholds used in ROC and PR curves
        val thresholds = precision.map(_._1)

        // ROC Curve
        val roc = metrics.roc

        // AUROC
        val auROC = metrics.areaUnderROC
        println("Area under ROC = " + auROC)*/
    // $example off$
    sc.stop()
  }
}

// scalastyle:on println
